23 #define foreach(x, v) for (typeof (v).begin() x=(v).begin(); x !=(v).end(); ++x)
24 #define For(i, a, b) for (int i=(a); i<(b); ++i)
25 #define D(x) cout << #x " is " << x << endl
27 /////////////////////////////////////////////////////////////////////////////////////////
28 // Aho-Corasick's algorithm, as explained in http://dx.doi.org/10.1145/360825.360855 //
29 /////////////////////////////////////////////////////////////////////////////////////////
31 const int MAXS
= 15 * 100 + 10; // Max number of states in the matching machine.
32 // Should be equal to the sum of the length of all keywords.
34 const int MAXC
= 256; // Number of characters in the alphabet.
36 vector
<int> out
[MAXS
]; // Output for each state.
37 // out[s] is a vector with the indeces of all keywords found when
38 // the machine reaches the state s.
40 // Used internally in the algorithm.
41 int f
[MAXS
]; // Failure function
42 int g
[MAXS
][MAXC
]; // Goto function, or -1 if fail.
44 // Builds the string matching machine.
46 // words - Vector of keywords. The index of each keyword is important:
47 // "out[state]" contains i if we just found word[i] in the text.
48 // lowestChar - The lowest char in the alphabet. Defaults to 0.
49 // highestChar - The highest char in the alphabet. Defaults to 255.
50 // "highestChar - lowestChar" must be <= MAXC, otherwise we will
51 // access the g matrix outside its bounds and things will go wrong.
53 // Returns the number of states that the new machine has.
54 // States are numbered 0 up to the return value - 1, inclusive.
55 int buildMatchingMachine(const vector
<string
> &words
, int lowestChar
= 0, int highestChar
= 255) {
56 for (int i
= 0; i
< MAXS
; ++i
) out
[i
].clear();
57 memset(f
, -1, sizeof f
);
58 memset(g
, -1, sizeof g
);
60 int states
= 1; // Initially, we just have the 0 state
62 for (int i
= 0; i
< words
.size(); ++i
) {
63 const string
&keyword
= words
[i
];
65 for (int j
= 0; j
< keyword
.size(); ++j
) {
66 int c
= keyword
[j
] - lowestChar
;
67 if (g
[currentState
][c
] == -1) { // Allocate a new node
68 g
[currentState
][c
] = states
++;
70 currentState
= g
[currentState
][c
];
72 out
[currentState
].push_back(i
);
75 // State 0 should have an outgoing edge for all characters.
76 for (int c
= 0; c
< MAXC
; ++c
) {
82 // Now, let's build the failure function
84 for (int c
= 0; c
<= highestChar
- lowestChar
; ++c
) { // Iterate over every possible input
85 // All nodes s of depth 1 have f[s] = 0
86 if (g
[0][c
] != -1 and g
[0][c
] != 0) {
92 int state
= q
.front();
94 for (int c
= 0; c
<= highestChar
- lowestChar
; ++c
) {
95 if (g
[state
][c
] != -1) {
96 int failure
= f
[state
];
97 while (g
[failure
][c
] == -1) {
100 failure
= g
[failure
][c
];
101 f
[g
[state
][c
]] = failure
;
103 for (int k
= 0; k
< out
[failure
].size(); ++k
) { // Merge output from the failure state into this one
104 out
[g
[state
][c
]].push_back(out
[failure
][k
]);
115 // Finds the next state the machine will transition to.
117 // currentState - The current state of the machine. Must be between
118 // 0 and the number of states - 1, inclusive.
119 // nextInput - The next character that enters into the machine. Should be between lowestChar
120 // and highestChar, inclusive.
121 // lowestChar - Should be the same lowestChar that was passed to "buildMatchingMachine".
123 // Returns the next state the machine will transition to. This is an integer between
124 // 0 and the number of states - 1, inclusive.
125 int findNextState(int currentState
, char nextInput
, int lowestChar
= 0) {
126 int answer
= currentState
;
127 int c
= nextInput
- lowestChar
;
128 while (g
[answer
][c
] == -1) answer
= f
[answer
];
133 // How to use this algorithm:
135 // 1. Modify the MAXS and MAXC constants as appropriate.
136 // 2. Call buildMatchingMachine with the set of keywords to search for.
137 // 3. Start at state 0. Call findNextState to incrementally transition between states.
138 // 4. Check the out function to see if a keyword has been matched.
142 // Assume keywords is a vector that contains {"he", "she", "hers", "his"} and text is a string
143 // that contains "ahishers".
145 // Consider this program:
147 // buildMatchingMachine(v, 'a', 'z');
148 // int currentState = 0;
149 // for (int i = 0; i < text.size(); ++i) {
150 // currentState = findNextState(currentState, text[i], 'a');
151 // if (out[currentState.size() == 0) continue; // Nothing new, let's move on to the next character.
152 // for (int j = 0; j < out[currentState].size(); ++j) {
153 // const string &keyword = keywords[out[currentState][j]];
154 // cout << "Keyword " << keyword << " appears from "
155 // << i - keyword.size() + 1 << " to " << i << endl;
159 // The output of this program is:
161 // Keyword his appears from 1 to 3
162 // Keyword he appears from 4 to 5
163 // Keyword she appears from 3 to 5
164 // Keyword hers appears from 4 to 7
166 /////////////////////////////////////////////////////////////////////////////////////////
167 // End of Aho-Corasick's algorithm. //
168 /////////////////////////////////////////////////////////////////////////////////////////
170 vector
<string
> keywords
;
172 void simplifyKeywords() {
173 int K
= keywords
.size();
174 // Remove duplicate keywords
175 sort(keywords
.begin(), keywords
.end());
176 keywords
.resize(unique(keywords
.begin(), keywords
.end()) - keywords
.begin());
177 assert(keywords
.size() <= K
);
180 vector
<bool> rm(K
, false);
181 for (int i
= 0; i
< K
; ++i
) {
182 for (int j
= 0; j
< K
; ++j
) {
183 if (i
== j
) continue;
184 if (keywords
[i
].find(keywords
[j
]) != string::npos
) {
185 // keywords[j] is a substring of keywords[i], delete i
191 vector
<string
> newKeywords
;
192 for (int i
= 0; i
< K
; ++i
) {
193 if (!rm
[i
]) newKeywords
.push_back(keywords
[i
]);
195 keywords
= newKeywords
;
196 assert(keywords
.size() <= K
);
199 int minimumChanges(const string
&s
) {
200 vector
< pair
<int, int> > intervals
;
202 for (int j
= 0; j
< s
.size(); ++j
) {
203 state
= findNextState(state
, s
[j
]);
205 for (int k
= 0; k
< out
[state
].size(); ++k
) {
206 const string
&word
= keywords
[ out
[state
][k
] ];
207 intervals
.push_back( make_pair(j
- word
.size() + 1, j
) );
210 int ans
= 0, last
= -1;
212 for (int i
= 0; i
< intervals
.size(); ++i
) {
213 int u
= intervals
[i
].first
, v
= intervals
[i
].second
;
225 while (cin
>> K
>> L
) {
226 if (K
== 0 and L
== 0) break;
230 for (int i
= 0; i
< K
; ++i
) {
232 keywords
.push_back(line
);
233 assert(line
.size() > 0);
235 assert(keywords
.size() == K
);
237 assert(keywords
.size() <= K
);
240 buildMatchingMachine(keywords
);
243 for (int i
= 0; i
< L
; ++i
) {
245 ans
+= minimumChanges(line
);